#! /usr/bin/env python

"""This module hold function to get site-wide constants

"""
import os
import sys
import xml.dom
import xml.dom.minidom as minidom
import threading


conf_path = 'conf'
logging_filename = 'logging.conf'
site_xml_filename = 'siteconf.xml'

dfs_dir_sep = '/'

#XML element names
xml_root_element = 'configuration'
xml_job_root_element = 'job_configuration'
xml_filenames_element = 'resource_names'
xml_filepaths_element = 'filepaths'
xml_res_locs_element = 'resource_locations'
xml_archi_settings_element = 'architecture_settings'
xml_net_settings_element = 'network_settings'
xml_input_settings = 'input_settings'
xml_computation_settings = 'computation_settings'

#XML child node element names
#filenames
xml_job_conf_filename = 'job_conf_filename'
xml_job_flags_filename = 'job_flags_filename'
xml_map_flags_filename = 'map_flags_filename'
xml_reduce_flags_filename = 'reduce_flags_filename'
xml_map_input_filename = 'map_input_filename'
xml_map_output_filename = 'map_output_filename'
xml_map_output_filename_with_key = 'map_output_filename_with_key'
xml_reduce_output_filename = 'reduce_output_filename'
xml_map_proc_temp_filename = 'map_proc_temp_filename'
xml_reduce_proc_temp_filename = 'reduce_proc_temp_filename'

#filepaths
xml_log_filepath = 'logconf_filepath'
xml_local_temp_dir = 'local_temp_dir'
xml_local_input_temp_dir = 'local_input_temp_dir'
xml_local_output_temp_dir = 'local_output_temp_dir'
xml_local_compute_temp_dir = 'local_compute_temp_dir'
xml_local_job_conf_filepath = 'local_job_conf_filepath'
xml_local_job_flags_filepath = 'local_job_flags_filepath'
xml_local_map_flags_filepath = 'local_map_flags_filepath'
xml_local_reduce_flags_filepath = 'local_reduce_flags_filepath'

#resource_locations
xml_dfs_job_base_path = 'job_dir_dfs_filepath'
xml_dfs_path_job_conf = 'job_conf_dfs_filepath'
xml_dfs_path_job_flags = 'job_flags_dfs_filepath'
xml_dfs_path_job_map_flags = 'map_flags_dfs_filepath'
xml_dfs_path_job_reduce_flags = 'reduce_flags_dfs_filepath'
xml_dfs_path_job_input = 'job_input_dfs_dir'
xml_dfs_path_job_output = 'job_output_dfs_dir'
xml_hadoop_path = 'hadoop_path'
xml_hadoop_exec = 'hadoop_exec'
xml_hdfs_conf_path = 'hdfs_conf_filepath'
xml_hdfs_name_dir_re = 'hdfs_name_dir_re'
xml_hdfs_data_dir_re = 'hdfs_data_dir_re'
xml_hdfs_ver_relative_filepath = 'hdfs_ver_relative_filepath'
xml_python_exec = 'python_exec'

#architecture_settings
xml_max_no_tasks = 'task_capacity'
xml_map_reduce_ratio = 'map_to_reduce_ratio'
xml_no_reduce_levels = 'reduce_levels'
xml_reduce_input_ratio = 'reduce_input_ratio'
xml_map_timeout_period = 'map_timeout_period'
xml_reduce_timeout_period = 'reduce_timeout_period'
xml_processrunner_fail_retries = 'processrunner_fail_retries'
xml_max_map_attempts = 'map_max_attempts'
xml_max_reduce_attempts = 'reduce_max_attempts'
xml_dfs_copy_retries = 'dfs_copy_retries'
xml_distlock_commit_wait = 'distributed_lock_commit_wait'
xml_distlock_lease_time = 'distributed_lock_lease_time'
xml_distlock_relock_wait = 'distributed_lock_relock_wait'
xml_distlock_lock_reattempt_wait = 'distributed_lock_reattempt_wait'
xml_distlock_timer_interval = 'distributed_lock_timerclass_interval'
xml_distlock_max_lock_attempts = 'distributed_lock_max_locking_attempts'
xml_map_to_reduce_schedule_ratio = 'map_to_reduce_schedule_ratio'
xml_schedule_multiply_factor = 'schedule_multiply_factor'
xml_file_req_responce_wait = 'broadcast_file_req_response_wait'
xml_out_brdcts_rqst_attempts = 'file_broadcasts_request_attempts'
xml_distlock_unlock_wait_attmpts = 'distributed_lock_unlock_wait_attempts'
xml_distlock_unlock_wait_time = 'distributed_lock_unlock_wait_time'
# xml_no_maps = 'no_maps'
xml_no_reduces = 'no_reduces'
xml_nt_heartbeat_interval = 'nodetracker_heartbeat_interval'
xml_nt_master_heartbeat_interval = 'nodetracker_master_heartbeat_interval'
xml_nt_task_expiry_interval = 'nodetracker_task_expiry_interval'
xml_task_ping_interval = 'task_ping_interval'
xml_check_task_expiry_interval = 'check_task_expiry_interval'
xml_check_on_timers_interval = 'check_on_timers_interval'
xml_child_process_niceness = 'child_process_niceness'
xml_stats_interval = 'stats_interval'
xml_stats_buffer_size = 'stats_buffer_size'
xml_stats_file_size_div = 'stats_file_size_divider'
xml_reduce_secs_inp_scaleup = 'reduce_secs_from_inp_scaleup'
xml_brickwall_mrplus = 'mrplus_do_brickwall'
xml_order_by_structs = 'mrplus_order_by_structs'
xml_estimation_ignore = 'mrplus_estimation_ignore'
xml_schedule_bias_for_large_levels = 'large_levels_schedule_bias'

#network_settings
xml_network_interface = 'communicating_interface'
xml_hdfs_host = 'hdfs_host'
xml_hdfs_port = 'hdfs_port'
xml_binding_address = 'binding_address'
xml_broadcast_address = 'broadcast_address'
xml_broadcast_port = 'broadcast_port'
xml_broadcast_buff_size = 'broadcast_packet_buffer'
xml_tcp_out_srv_bind_addr = 'tcp_output_srv_bind_address'
xml_net_output_srv_bind_port = 'net_output_srv_bind_port'
xml_net_file_transfer_buff_size = 'net_file_transfer_buffer'
xml_net_command_buff_size = 'net_command_buffer'
xml_master_ip = 'mapred_master_ip'
xml_master_port = 'mapred_master_port'

#input_settings
xml_map_inp_chunk_size = 'map_input_chunk_size'
xml_reduce_inp_read_size = 'reduce_input_read_chunk'
xml_reduce_inp_compute_size = 'reduce_input_compute_chunk'

#computation_settings
xml_map_module_name = 'map_module'
xml_map_class_name = 'map_class'
xml_reduce_module_name = 'reduce_module'
xml_reduce_class_name = 'reduce_class'
xml_est_reduce_margin = 'change_estimation_reduce_margin'
xml_min_inputs_for_change = 'min_inputs_to_declare_not_changed'
xml_threshold_value = 'threshold_value'
xml_mrplus_maptask_runner = 'mrplus_map_task_runner'
xml_mr_maptask_runner = 'mr_map_task_runner'
xml_mrplus_redtask_runner = 'mrplus_reduce_task_runner'
xml_mr_redtask_runner = 'mr_reduce_task_runner'

#xml derived place-holders
xml_derived_placeholders = {'[temp_dir]':xml_local_temp_dir,
                            '[dfs_jobs_path]':xml_dfs_job_base_path,
                            '[job_conf_filename]':xml_job_conf_filename,
                            '[job_flags_filename]':xml_job_flags_filename,
                            '[map_flags_filename]':xml_map_flags_filename,
                            '[reduce_flags_filename]':xml_reduce_flags_filename,
                            '[map_out_filename]':xml_map_output_filename,
                            '[hadoop_path]':xml_hadoop_path}

#user provided place-holders
job_id_placeholder = '[job_id]'
task_id_placeholder = '[task_id]'
split_id_placeholder = '[split_id]'


def ma_root_path():
    """returns the absolute path to MRImprov/src/ma"""
    root_dir = os.path.abspath(os.path.dirname(__file__))
    return root_dir + os.sep

def ma_conf_path():
    """returns the conf dir path attached to the root path"""
    conf_dir = ma_root_path() + conf_path
    return conf_dir + os.sep

def ma_logging_filepath():
    """returns the filepath to the logging.conf file"""
    log_conf_filepath = ma_conf_path() + logging_filename
    return log_conf_filepath

def ma_siteconf_filepath():
    """returns the filepath to the siteconf.xml file"""
    site_conf_filepath = ma_conf_path() + site_xml_filename
    return site_conf_filepath


class XmlData(object):
    """This class is used to read values directly from the siteconf.xml
    file. It is also used to read the default XML values set for the system
    """
    
    __parse_data = None
    __data = None

    
    @staticmethod
    def __initialize():
        """called to initially parser the xml data"""
        XmlData.__parse_data = minidom.parse(ma_siteconf_filepath())
        XmlData.__data = XmlData.__parse_data.getElementsByTagName(xml_root_element)[0]

    
    @staticmethod
    def get_unicode_data(element_name):
        """returns the requested XML element in its raw unicode form"""
        try:
            if XmlData.__data == None:
                XmlData.__initialize()
                
            temp = XmlData.__data.getElementsByTagName(element_name)
            #if the element could not be found
            if len(temp) == 0:
                return None
            u = temp[0].childNodes[0].data
            #replace all place-holders
            u = XmlData.__replace_placeholders(u)
            #return the raw unicode data
            return u
        except xml.dom.NotFoundErr as err_msg:
            print('XML ERROR - could not find the requested element in the XML: %s' % err_msg)
        except:
            print('XML ERROR - ', sys.exc_info()[0])
            raise


    @staticmethod
    def get_str_data(element_name):
        """returns the requested element converted into an ascii string"""
        u = XmlData.get_unicode_data(element_name)
        if u == None:
            return None
        else:
            return u#.encode('ascii', 'replace')        


    @staticmethod
    def get_int_data(element_name):
        """returns the requested element converted into an integer"""
        try:
            u = XmlData.get_unicode_data(element_name)
            if u == None:
                return None
            else:
                return int(u)#.encode('ascii', 'ignore'))
        except ValueError:
            print('XML ERROR - could not convert the requested element into int')


    @staticmethod
    def get_float_data(element_name):
        """returns the requested element converted into an floating point no."""
        try:
            u = XmlData.get_unicode_data(element_name)
            if u == None:
                return None
            else:
                return float(u)#.encode('ascii', 'ignore'))
        except ValueError:
            print('XML ERROR - could not convert the requested element into float')

            
    @staticmethod
    def get_filepath_str_data(element_name):
        """returns the requested element converted into a directory of file path
        
        The function returns current file-system specific directory separators.   
        """
        u = XmlData.get_unicode_data(element_name)
        if u == None:
            return None
        else:
            #d = u.encode('ascii', 'ignore')
            return u.replace('/', os.sep)


    @staticmethod
    def get_dfs_filepath_str_data(element_name):
        """returns the requested element converted into a DFS dir. of file path
        
        The function returns DFS specific directory separators.   
        """
        u = XmlData.get_unicode_data(element_name)
        if u == None:
            return None
        else:
            #d = u.encode('ascii', 'ignore')
            return u.replace('/', dfs_dir_sep)


    @staticmethod
    def __replace_placeholders(element_value):
        """returns a string with all the place-holder values replaced   
        """
        for k in list(xml_derived_placeholders.keys()):
            if element_value.find(k) >= 0:
                xml_elem = xml_derived_placeholders[k]
                replace_val = XmlData.get_unicode_data(xml_elem)
                element_value = element_value.replace(k, replace_val)
        return element_value

        
    def __del__(self):
        """XML reader class destructor for unlinking file"""
        self.__parse_data.unlink()



class JobsXmlData(object):
    """This class is used to read the settings for a specific job. If the
    configuration values are not given in jobconf.xml, they are pulled from
    the default siteconf.xml
    """
    
    __parse_data = {}
    __data = {}
    
    @staticmethod
    def __initialize(job_id):
        """called to initially parser the xml data"""
        
        job_xml_conf_filepath = XmlData.get_filepath_str_data(xml_local_job_conf_filepath)
        job_xml_conf_filepath = job_xml_conf_filepath.replace(job_id_placeholder, str(job_id))
        if not os.path.exists(job_xml_conf_filepath):
            print('WARNING job_conf file not found:', job_xml_conf_filepath)
            return False
        else:
            JobsXmlData.__parse_data[job_id] = minidom.parse(job_xml_conf_filepath)
            JobsXmlData.__data[job_id] = JobsXmlData.__parse_data[job_id].getElementsByTagName(xml_job_root_element)[0]
            return True
    
    
    @staticmethod
    def reinitialize(job_id=None):
        """called to reintialize / remove all previous XML DOM data for the 
        specified job_id. If no job_id is specified, it deletes all DOM data
        """
        
        if job_id == None:
            # if no job_id specified, delete all data
            JobsXmlData.__parse_data = {}
            JobsXmlData.__data = {}
        else:
            # if job_id specified, delete only its data
            if job_id in JobsXmlData.__parse_data:
                del JobsXmlData.__parse_data[job_id]
                
            if job_id in JobsXmlData.__data:
                del JobsXmlData.__data[job_id]
        
    
    @staticmethod
    def get_unicode_data(element_name, job_id, task_id=None, split_id=None):
        """returns the requested XML element in its raw unicode form"""
        
        try:
            if job_id not in JobsXmlData.__data:
                file_exists = JobsXmlData.__initialize(job_id)
            else:
                file_exists = True
        
            # try to fetch data from job conf if file is present
            if file_exists:
                temp = JobsXmlData.__data[job_id].getElementsByTagName(element_name)
                
            if not file_exists or len(temp) == 0:
                u = XmlData.get_unicode_data(element_name)
                if u == None:
                    return None
            else:
                u = temp[0].childNodes[0].data
                #replace all place-holders
                u = JobsXmlData.__replace_placeholders(u)
                
            # replace job_id place-holders
            u = u.replace(job_id_placeholder, str(job_id))
            
            # replace all other place-holders
            if task_id is not None:
                u = u.replace(task_id_placeholder, str(task_id))
            if split_id is not None:
                u = u.replace(split_id_placeholder, str(split_id))
            
            #return the raw unicode data
            return u 
        except xml.dom.NotFoundErr as err_msg:
            print('XML ERROR - could not find the requested element in the XML: %s' % err_msg)
        except:
            print('XML ERROR - ', sys.exc_info()[0])
            raise


    @staticmethod
    def get_str_data(element_name, job_id, task_id=None, split_id=None):
        """returns the requested element converted into an ascii string"""
        u = JobsXmlData.get_unicode_data(element_name, job_id, task_id, split_id)
        if u == None:
            return None
        else:
            return u#.encode('ascii', 'replace')


    @staticmethod
    def get_int_data(element_name, job_id):
        """returns the requested element converted into an integer"""
        try:
            u = JobsXmlData.get_unicode_data(element_name, job_id)
            if u == None:
                return None
            else:
                return int(u.encode('ascii', 'ignore'))
        except ValueError:
            print('XML ERROR - could not convert the requested element into int')


    @staticmethod
    def get_float_data(element_name, job_id):
        """returns the requested element converted into an floating point no."""
        try:
            u = JobsXmlData.get_unicode_data(element_name, job_id)
            if u == None:
                return None
            else:
                return float(u.encode('ascii', 'ignore'))
        except ValueError:
            print('XML ERROR - could not convert the requested element into float')
    
                
    @staticmethod
    def get_filepath_str_data(element_name, job_id):
        """returns the requested element converted into a directory of file path
        
        The function returns current file-system specific directory separators.   
        """
        u = JobsXmlData.get_unicode_data(element_name, job_id)
        if u == None:
            return None
        else:
            #d = u.encode('ascii', 'ignore')
            return u.replace('/', os.sep)


    @staticmethod
    def get_dfs_filepath_str_data(element_name, job_id):
        """returns the requested element converted into a DFS dir. of file path
        
        The function returns DFS specific directory separators.   
        """
        u = JobsXmlData.get_unicode_data(element_name, job_id)
        if u == None:
            return None
        else:
            #d = u.encode('ascii', 'ignore')
            return u.replace('/', dfs_dir_sep)


    @staticmethod
    def __replace_placeholders(element_value):
        """returns a string with all the place-holder values replaced   
        """
        for k in list(xml_derived_placeholders.keys()):
            if element_value.find(k) >= 0:
                xml_elem = xml_derived_placeholders[k]
                replace_val = JobsXmlData.get_unicode_data(xml_elem)
                element_value = element_value.replace(k, replace_val)
        return element_value

            
    def __del__(self):
        """XML reader class destructor for unlinking file"""
        for dom_tree in list(JobsXmlData.__parse_data.values()):
            dom_tree.unlink()



if __name__ == '__main__':
    #main for testing
    print("Logging Filepath: ", ma_logging_filepath())
    print("Site conf. xml Filepath: ", ma_siteconf_filepath())
    print("Logging Filepath: ", XmlData.get_filepath_str_data(xml_log_filepath))
    print("HDFS Host: ", XmlData.get_str_data(xml_hdfs_host))
    print("HDFS Port: ", XmlData.get_int_data(xml_hdfs_port))
    print("Task capacity: ", XmlData.get_int_data(xml_max_no_tasks))
    print("Broadcast Addy: ", XmlData.get_str_data(xml_broadcast_address))
    print("Binding Addy: ", XmlData.get_str_data(xml_binding_address))
    print("Communication Interface: ", XmlData.get_str_data(xml_network_interface))
    print("Reduce Flags local filepath: ", JobsXmlData.get_dfs_filepath_str_data(xml_local_reduce_flags_filepath, 1))
    print("Output Map path function: ", JobsXmlData.get_filepath_str_data(xml_map_module_name, 1))
    print("Output Reduce filename: ", JobsXmlData.get_str_data(xml_reduce_output_filename, 1, 12))
    print("Output Map filename with key: ", JobsXmlData.get_str_data(xml_map_output_filename_with_key, 1, 12, 55))
    print("Local output temp dir: ", JobsXmlData.get_filepath_str_data(xml_local_output_temp_dir, 1))
